python programming cheatsheet.html
time & space complexities
strings
- creating a substring:
O(n)
time
libraries
csv
Standard library module for reading/write CSV files
# example usage
# standard reading line by line
with open('example.csv', mode='r') as file:
= csv.reader(file)
reader for row in reader:
print(row)
# read with dict reader to auto parse header row
with open('example.csv', mode='r', newline='') as file:
= csv.DictReader(file)
reader = reader.fieldnames
headers for row in reader:
print(row) # row is a dictionary with header names as keys
# write out data
= [
data "Name", "Age", "City"],
["Alice", 30, "New York"],
["Bob", 25, "San Francisco"]
[
]with open('output.csv', mode='w', newline='') as file:
= csv.writer(file)
writer writer.writerows(data)
datetime
A useful library for parsing formatted timestamp strings.
# example usage
from datetime import datetime, timedelta
= datetime.fromisoformat('2019-01-04T16:41:24+02:00')
ts
# compute new ts
= ts - timedelta(minutes=5)
ts_before
# example of implementing a 5 minute sliding window
# through a stream of timestamped "logs"
#
# GOAL: find "servers" with >= 3 error logs over any
# 5 minute time window
def parse_logs(logs: list[str]) -> Iterator[tuple[datetime, str, str]]:
"""
Assume log format: [2024-12-13T10:30:00Z] server1 INFO "Service started"
"""
for log in logs:
= log.split(" ")
split_log = split_log[0]
ts_str = split_log[1]
server = split_log[2]
severity yield (
1 : len(ts_str) - 1]),
datetime.fromisoformat(ts_str[
server,
severity,
)
def find_servers_with_recurring_errors(
list[str], error_window_minutes: int = 5, error_threshold: int = 3
logs: -> list[str]:
) dict[str, Deque[datetime]] = defaultdict(Deque)
server_logs: set[str] = set()
errored_servers:
for ts, server, severity in parse_logs(logs):
= server_logs[server]
curr_errors
if severity == "ERROR":
curr_errors.append(ts)
= ts - timedelta(minutes=error_window_minutes)
window_start while curr_errors and curr_errors[0] < window_start:
curr_errors.popleft()
if len(curr_errors) >= error_threshold:
errored_servers.add(server)
return sorted(list(errored_servers))
requests
functools
Useful tools for working with functions
cache
A decorator that can be used to memoize the results of a function:
def wordBreak(self, s: str, wordDict: List[str]) -> bool:
@cache
def dp(i):
if i < 0:
return True
for word in wordDict:
if s[i - len(word) + 1 : i + 1] == word and dp(i - len(word)):
return True
return False
return dp(len(s) - 1)
gotchas
truthiness
Be wary of python truthy/falsey values! E.g., the following has a subtle bug:
def maxSubArray(self, nums: List[int]) -> int:
= 0
begin = 0
end = 0
curr_sum = None
max_sum
while end < len(nums):
+= nums[end]
curr_sum
if not max_sum or curr_sum > max_sum:
= curr_sum
max_sum
+= 1
end while curr_sum < 0 and begin < end:
-= nums[begin]
curr_sum += 1
begin
return max_sum
Consider the input [-1, 0, -1]
; the answer should be
0
. But if we set max_sum
to 0
,
the condition not max_sum
is TRUE, because 0
is Falsey. The correct condition to check is
if max_sum is None ... :
heapq priority conflicts
A pattern suggested by the python standard library docs for
implementing a priority queue is to insert items into a heap as
(priority, item)
tuples. But what happens if there is a tie
in priority?
In the scenario, the heapq
library falls back to
comparing the second elements of the tuples; in the case, the items
themselves. This can lead to undesirable outcomes. If the elements do
not have a comparison function, an exception will be raised. Bad! If
they do, the resulting ordering may not semantically match what is
desired. For example, what if we want to maintain that elements are
returned to use in the same order in which they were inserted?
The recommended solution is to insert items as tuples of size 3, of
the form (priority, counter, item)
. The counter is a simple
increasing integer. Immediately we get ordering based on when an item is
inserted, and ties broken by a datum with a defined comparator. See the
following example solution, which merges k sorted linked lists using a
heap to determine which node should be inserted next:
# Definition for singly-linked list.
# class ListNode:
# def __init__(self, val=0, next=None):
# self.val = val
# self.next = next
class Solution:
def mergeKLists(self, lists: List[Optional[ListNode]]) -> Optional[ListNode]:
if not lists:
return None
= []
curr_ptrs = itertools.count()
counter for head in lists:
if head:
next(counter), head))
heapq.heappush(curr_ptrs, (head.val,
= None
head = None
tail while curr_ptrs:
= heapq.heappop(curr_ptrs)
_, _, min_node = ListNode(min_node.val, None)
new_node
if not tail:
= new_node
head = head
tail else:
next = new_node
tail.= new_node
tail
if min_node.next:
= min_node.next
next_node next(counter), next_node))
heapq.heappush(curr_ptrs, (next_node.val,
return head
Alternatively, we can insert into our heap a custom class with a defined comparator:
class HeapNode:
def __init__(self, node):
self.node = node
def __lt__(self, other):
# Define comparison based on ListNode's value
return self.node.val < other.node.val
= []
heap 1))) heapq.heappush(heap, HeapNode(ListNode(